Preprocessing

# Packages
#library("CyTOF") # Need to adjust name
library("devtools")
library(ruv)
library(rsvd)
load_all() # Load my package
library("cytofkit")
library("gridExtra")
library("ggpubr")
library("tidyverse")
library("FlowSOM")
library("RColorBrewer")
# Takes list of plots, character vector of titles
add_titles <- function(plots, titles){
  map2(plots, titles, function(x, y) x + labs(titles = y))
}
set.seed(42)
titles <- c("Raw Simulated", "RUVIII Simulated")
setwd("/home/users/allstaff/pullin.j/CyTOF-RUVIII/Data")
# NB: the first file must be the raw one
files <- c("sim_data.rds")
data_list <- map(files, ~readRDS(.))

n_raw_files <- 1
n_data <- length(data_list)
samples <- params$samples

data_list[[1]] <- data_list[[1]] %>%
  filter(sample %in% samples) %>% 
  mutate(ind = 1:nrow(.)) %>% 
  group_by(sample) %>% 
  sample_n(1000) %>% 
  ungroup() %>% 
  as.data.frame()

index <- as.vector(as.matrix(select(data_list[[1]], ind)))
data_list[[1]] <- select(data_list[[1]], -ind)

if(n_raw_files != n_data){
  # Downsample other files
  for(i in (n_raw_files+1):n_data){
    data_list[[i]] <- data_list[[i]] %>%
      filter(sample %in% samples) %>% 
      slice(index) %>% 
      as.data.frame()
  }
}
norm_clus<- params$norm_clusters
k <- params$k

norm_data <- normalise_data(data_list[[1]], norm_clus, k, num_clusters = 3)
data_list[[length(data_list) + 1]] <- norm_data

n_data <- length(data_list)
# Perform tSNE on the data
tsne_list <- map(data_list, function(x) compute_tsne(x, 5000))
##   Running t-SNE...with seed 42  DONE
##   Running t-SNE...with seed 42  DONE

PCA

plot_pca_samp <- map(data_list, ~plot_scpca_samp(., N = 2000))
plot_pca_samp <- flatten(plot_pca_samp)
plot_pca_samp <- add_titles(plot_pca_samp, rep(titles, each = 3))
ggarrange(plotlist = plot_pca_samp, ncol = 3, nrow = 3, common.legend = TRUE, legend = "right")

t-SNE

t-SNE coloured by sample

plot_tsne_samp <- map(tsne_list, plot_tsne_sample)
plot_tsne_samp <- add_titles(plot_tsne_samp, titles)
ggarrange(plotlist = plot_tsne_samp, ncol = 2, nrow = 2, common.legend = TRUE, legend = "right")

F statistics

plot_Ftsne <- map(tsne_list, tsne_F_stats)
plot_Ftsne <- flatten(plot_Ftsne)
# Note that the axes on the plots are not the same
ggarrange(plotlist = plot_Ftsne, ncol = 2, nrow = 3)

t-SNE coloured by cluster

plot_tsne_clus <- map(tsne_list, plot_tsne_cluster)
plot_tsne_clus <- add_titles(plot_tsne_clus, titles)
ggarrange(plotlist = plot_tsne_clus, ncol = 2, nrow = 2)

Cluster Frequency

clus_freq_plot <- map(data_list, plot_cluster_freq)
clus_freq_plot <- flatten(clus_freq_plot)
ggarrange(plotlist = clus_freq_plot, ncol = 2, nrow = 3)

Median marker intensity

median_exprs_plots <- map(data_list, plot_median_exprs)
median_exprs_plots <- add_titles(median_exprs_plots, titles)
ggarrange(plotlist = median_exprs_plots, ncol = 2, nrow = 2, legend = "bottom")

Marker Densities

marker_densities <- map(data_list, plot_marker_densities)
marker_densities <- add_titles(marker_densities, titles)
ggarrange(plotlist = marker_densities, ncol = 2, nrow = 2, common.legend = TRUE, legend = "bottom")

Cluster Matching

clust_match_plot <- plot_cluster_match(data_list[[1]], data_list[[2]])
clust_match_plot <- clust_match_plot + labs(x = titles[[1]], y = titles[[2]])
clust_match_plot